
** Merged data: temp, fine1, coredu **

clear all
set more on
set mat 800
capture log close

cd "$E"

global path "E:\Project1_empirical" // global path file
global census  "$path\Census"     // census data
global city    "$path\City"       // old city yearbook
global iv      "$path\IV"         // IV dataset
global chips   "$path\CHIPs"      // CHIPs dataset
global working_data = "$path\5th Edition (Age 20 to 30) Mean Controls\Working Data" // working data be used in regression
global citynew "$path\City\Raw data" // mean vars over past years

*adopath +   "$path\adofiles\plus"  // ado files 

*cap mkdir "$working_data2035"

**————————————————————————————————————2002————————————————————————————————————**
use "$chips\2002\2002 DS0001[Urban Individual Income, Consumption, and Employment]\21741-0001-Data.dta", clear

bys PCODE: egen hsize=max(P102) // HH size //
order hsize, after(P102)

keep if P106>=20 & P106<=30 //keep individual information for age between 20 to 30//
keep if P109==2 // get married //

duplicates t PCODE, g(tag)
order tag, after(PCODE)
tab tag
drop if tag==0 //delete not pairs//
drop tag

keep if (P103==1 | P103==2 | P103==3 | P103==4) //keep relationships//
/*
drop in 107/108
drop in 215/216
drop in 329/330
drop in 333/334
for each`i'
*/
bys PCODE: egen spouse=sum(P103)
order spouse, after(P102)
tab spouse
keep if spouse==3 | spouse==7 // self=1 + spouse=2 or child=3 + child in law=4

	******Individual information ******
bys PCODE: egen hage=max(P106*(P105==1))                           /* husband age*/
bys PCODE: egen wage=max(P106*(P105==2))                           /* wife age */
bys PCODE: egen hedu=max(P113*(P105==1))                           /* husband year of schooling*/  
bys PCODE: egen wedu=max(P113*(P105==2))                           /* wife year of schooling*/
bys PCODE: egen hedulevel=max(P112*(P105==1))
bys PCODE: egen wedulevel=max(P112*(P105==2))

gen work=1               //work //
replace work=0 if P107>1 //status work or not//
bys PCODE: egen hwork=max(work*(P105==1))
bys PCODE: egen wwork=max(work*(P105==2)) //work status of husband or wife//

tab P107 // 82.77% individuals working or employed //

gen hukou=1 //non-agriculture//
replace hukou=0 if P104==2 | P104==4
bys PCODE: egen hhukou=max(hukou*(P105==1))
bys PCODE: egen whukou=max(hukou*(P105==2)) //hukou status of spouses//

gen minor=1 //yes //
replace minor=0 if P108<2
bys PCODE: egen hmin=max(minor*(P105==1))
bys PCODE: egen wmin=max(minor*(P105==2)) //minority//

bys PCODE: egen hgrades=max(P115*(P105==1))
bys PCODE: egen wgrades=max(P115*(P105==2)) //grades of individual//

rename P201 INCOME02

global yr "98 99 00 01 02"
foreach y in $yr {
	bys PCODE: egen hinc`y'=max(INCOME`y'*(P105==1))
	bys PCODE: egen winc`y'=max(INCOME`y'*(P105==2))
} 
// individual income in previous 5 years //

/*
forvalues i=1 (1) 6 {
	gen t`i'=INCOME`i'

} */
rename CITY code
rename PCODE pcode

save "$working_data\temp02", replace

    ** Net individual labor income **
use "$chips\2002\2002 DS0003[Urban Individual Annual Income Data]\21741-0003-Data.dta", clear

drop if A1==99 // 20983 individuals, 20933 unique member number //

keep CODE_P A1 A2 A6 A151 A152 A19 //A1: member code, A2: relationships, A6: gender, A151: work income, A152: management income, A19: 个人所得税

merge m:1 CODE_P using "$working_data\temp02"
keep if _merge==3
drop _merge

order P102 P103 P105 P106, after(CODE_P)

bys pcode: egen hwage=max((A151-A19)*(P105==1))
bys pcode: egen wwage=max((A151-A19)*(P105==2))

collapse (first) code hage wage hedu wedu hedulevel wedulevel hwork wwork hhukou ///
whukou hmin wmin hgrades wgrades hinc98-winc02 hwage wwage, by(pcode)

gen hage2=hage*hage
gen wage2=wage*wage
gen year=2002

*———————— change age restriction, the corresponding value change————————————*
/*
replace hedu = 9 in 47 // junior high school //
replace wedu = 8 in 47 // junior high school //
replace hedu = 9 in 109 
*/
    ** gen prefecture, province code **
tostring code, gen(p)

gen prefect=substr(p,1,4)
destring prefect, force replace

gen pv=substr(p,1,2)
destring pv, force replace

order prefect pv year, after(code)
drop p

* codebook pv, prefect
* 12 provinces, 62 prefects"

    ** household labor income **
gen hhwage=hwage+wwage

save "$working_data\temp02", replace			 

*—————————————————————————————————sex ratios 2002———————————————————————————————
** prefecture level **——————————————————————————————————————————————————————————
use $census\2000_county.dta, clear

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=(male_15_19/5)*2+male_20_24+(male_25_29/5)*4 //18 to 28//

bys code: gen femalepref=(female_15_19/5)*2+female_20_24+(female_25_29/5)*4 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

egen poppref=rowtotal(male_0-female_85) // total pop in 2000 //
order poppref,after(county)

gen pop2030=malepref+femalepref // has to assume the total pop remain same in 2002
gen share2030=pop2030/poppref //share of age 20 to 30 group//

* each "code" is unique, no need to use bys *
gen pop1564_1=((male_10_14+female_10_14)/5)*2+((male_60_64+female_60_64)/5)*3 // 13 to 62
egen pop1564_2=rowtotal(male_15_19-female_55_59)
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* merge sex ratio 2002 

merge m:m prefect using "$working_data\temp02"
keep if _merge==3
drop _merge
order county, after(code)

save "$working_data\temp02", replace

*———————————————————————————— merge city info 2002 —————————————————————————————
use "$citynew\temp02", clear
tostring code, force replace
gen prefect=substr(code,1,4),after (code)
destring code prefect,force replace

replace prefect=1101 if prefect==1100 //change to beijing area//
replace prefect=1423 if prefect==1411 // Lvliang district //
replace prefect=5001 if prefect==5000 // Chongqing //
replace prefect=5327 if prefect==5308 // simao district //
replace prefect=6223 if prefect==6206 // wuwei district //
replace prefect=6227 if prefect==6208 // pingliang district //

merge m:m prefect using "$working_data\temp02"
drop if _merge==1
drop _merge

save "$working_data\temp02", replace // prefecture level, 206 obs, 151 vars //

********************************************************************************
**————————————————————————————————————2007————————————————————————————————————**
use "$chips\2007 (RUMiC 2008)\CHIP2007_or_RUMiC2008_urban_data_20151222\UHS_w1_abc.dta", clear

gen age=2007-(a05_1), after(a05_1)

rename hhcode PCODE

bys PCODE: egen hsize=max(a02)
order hsize, after (a02)

*tab age if a03==7 // the age of grandchildren //
*tab a07 if a03==7 //376 unmarried in 380 people//

keep if age>=20 & age<=30 //keep individual information for age between 20 to 30//
keep if a07==2	// first marriage//	

duplicates t PCODE,g(tag)
order tag, after(PCODE)
tab tag
drop if tag==0 //delete not pairs//

keep if (a03==1 | a03==2 | a03==3 | a03==6) //keep relationships//
bys PCODE: egen spouse=sum(a03)
order spouse, after(a03)
tab spouse //3 6 9 12 18//
list PCODE if spouse==6 | spouse==12 | spouse==18 //spouse==6 brothers or sisters

keep if spouse==3 | spouse==9 
/* 
drop if spouse==6
replace PCODE=44190000329920 in 455 //break into another pair
replace PCODE=44190000329920 in 456
drop in 464 //delete sister
*/ //612 people//
drop tag

tab spouse

***************************************
	******Individual information ******
bys PCODE: egen hage=max(age*(a04==1))                           /* husband age*/
bys PCODE: egen wage=max(age*(a04==2))                           /* wife age */
bys PCODE: egen hedu=max(b03*(a04==1))                           /* husband year of schooling*/  
bys PCODE: egen wedu=max(b03*(a04==2))                           /* wife year of schooling*/
bys PCODE: egen hedulevel=max(b02*(a04==1))
bys PCODE: egen wedulevel=max(b02*(a04==2))

gen work=1               //work //
replace work=0 if a17>1 //status work or not// 3 people waiting for assignment job
bys PCODE: egen hwork=max(work*(a04==1))
bys PCODE: egen wwork=max(work*(a04==2)) //work status of husband or wife//

tab a17 // 88.42% employed, 4.36% unemployed 4.19% housemaker //
tab hsize // 2 21.81%, 3 39.60%, 4 18.12%, 5 17.45%, 6 2.35%, 7、8 0.34% //

gen hukou=1 //non-agriculture//
replace hukou=0 if a14==3 | a14==4
bys PCODE: egen hhukou=max(hukou*(a04==1))
bys PCODE: egen whukou=max(hukou*(a04==2)) //hukou status of spouses//

gen minor=1 //yes //
replace minor=0 if a09<2
bys PCODE: egen hmin=max(minor*(a04==1))
bys PCODE: egen wmin=max(minor*(a04==2)) //minority//

bys PCODE: egen hgrades=max(b04*(a04==1)) // the higher the worse //
bys PCODE: egen wgrades=max(b04*(a04==2)) //grades of individual//

rename count code
rename PCODE pcode

    ** net labor income **
replace c17=0 if c17==.
replace c20=0 if c20==.

gen salary=c17*12, after(c20) //167
replace salary=c20*12 if c20>=c17

bys pcode: egen hwage=max(salary*(a04==1)) // husband net labor income //
bys pcode: egen wwage=max(salary*(a04==2)) // wife net labor income //


collapse (first) code hage wage hedu wedu hedulevel wedulevel hwork wwork hhukou ///
whukou hmin wmin hgrades wgrades hwage wwage, by(pcode)

gen hage2=hage*hage
gen wage2=wage*wage
gen year=2007

drop if hedu==0 | wedu==0 | hedu==. | wedu==.

tostring code, gen(p)

gen prefect=substr(p,1,4)
destring prefect, force replace

gen pv=substr(p,1,2)
destring pv, force replace

order prefect pv year, after(code)
drop p

    ** net household labor income **
gen hhwage=hwage+wwage
	
save "$working_data\temp07", replace			

*—————————————————————————————————sex ratios 2007———————————————————————————————
** prefecture level **——————————————————————————————————————————————————————————
clear
use $census\2010_county.dta

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=((male_20_24/5)*2+male_25_29+(male_30_34/5)*4) //23 to 33//

bys code: gen femalepref=((female_20_24/5)*2+female_25_29+(female_30_34/5)*4) 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

* each "code" is unique, no need to use bys *
egen poppref=rowtotal(male_0-female_85) //total pop at county level//
order poppref,after(county)

gen pop2030=malepref+femalepref
gen share2030=pop2030/poppref //share of age 20 to 30 group//

gen pop1564_1=((male_15_19+female_15_19)/5)*2+((male_65_69+female_65_69)/5)*3
egen pop1564_2=rowtotal(male_20_24-female_60_64) // 18 to 67 //
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* merge sex ratio 2007 

merge 1:m prefect using "$working_data\temp07"
keep if _merge==3
drop _merge
order county, after(code)

save "$working_data\temp07", replace // prefecture level, Obs=292 //


*———————————————————————————— merge city info 2007 —————————————————————————————
use "$citynew\temp07", clear
tostring code, force replace
gen prefect=substr(code,1,4),after (code)
destring code prefect,force replace

replace prefect=3101 if prefect==3100 // shanghai //
replace prefect=5001 if prefect==5000 // chongqing //

merge 1:m prefect using "$working_data\temp07"
keep if _merge==3
drop _merge

save "$working_data\temp07", replace // prefecture level, # Obs=292, 165 vars //


********************************************************************************
**————————————————————————————————————2013————————————————————————————————————**
use "$chips\2013\CHIP2013_urban_personal information\CHIP2013_urban_person.dta", clear		

gen age=2013-(a04_1), after(a04_1)

rename hhcode PCODE
bys PCODE: egen hsize=max(person)
order hsize, after (person)
			 
keep if age>=20 & age<=30 //keep individual information for age between 20 to 30//
keep if a05==1 // first marriage //
keep if (a02==1 | a02==2 | a02==3 | a02==7) //keep relationships//	

duplicates t PCODE,g(tag)
order tag, after(PCODE)
tab tag
drop if tag==0 //delete not pairs//

bys PCODE: egen spouse=sum(a02)
order spouse, after(a02)
tab spouse //spouse==6,9 denotes brothers sisters, spouse==13, 2儿子1儿媳，spouse==20, 两对夫妇//
*spouse==20 could be break
keep if spouse==3 | spouse==10


	******Individual information ******
bys PCODE: egen hage=max(age*(a03==1))                           /* husband age*/
bys PCODE: egen wage=max(age*(a03==2))                           /* wife age */
bys PCODE: egen hedu=max(a13_2*(a03==1))                           /* husband year of schooling*/  
bys PCODE: egen wedu=max(a13_2*(a03==2))                           /* wife year of schooling*/
bys PCODE: egen hedulevel=max(a13_1*(a03==1))
bys PCODE: egen wedulevel=max(a13_1*(a03==2))

gen work=1               //work //
replace work=0 if a19>1 //status work or not//
bys PCODE: egen hwork=max(work*(a03==1))
bys PCODE: egen wwork=max(work*(a03==2)) //work status of husband or wife//

tab a19 // 85.94% employed, 4.26% homemaker, 3.69% other, 3.13% unempoyed //

gen hukou=1 //non-agriculture//
replace hukou=0 if a10==1
bys PCODE: egen hhukou=max(hukou*(a03==1))
bys PCODE: egen whukou=max(hukou*(a03==2)) //hukou status of spouses//

gen minor=1 //yes //
replace minor=0 if a06<2
bys PCODE: egen hmin=max(minor*(a03==1))
bys PCODE: egen wmin=max(minor*(a03==2)) //minority//

rename coun code
rename PCODE pcode

    ** net labor income **
replace c05_1=0 if c05_1==.
replace c11_4=0 if c11_4==.

bys pcode: egen hwage=max((c05_1+c11_4)*(a03==1)) // husband net labor income //
bys pcode: egen wwage=max((c05_1+c11_4)*(a03==2)) // wife net labor income //

collapse (first) code hage wage hedu wedu hedulevel wedulevel hwork wwork hhukou ///
whukou hmin wmin hwage wwage, by(pcode)

gen hage2=hage*hage
gen wage2=wage*wage
gen year=2013

drop if hedu==0 | wedu==0 | hedu==. | wedu==.

tostring code, gen(p)
gen prefect=substr(p,1,4)
destring prefect, force replace
gen pv=substr(p,1,2)
destring pv, force replace
order prefect pv year, after(code)
drop p

gen hhwage=hwage+wwage

save "$working_data\temp13", replace // # Obs: 349, vars: 19, 349 HH //		 

*—————————————————————————————————sex ratios 2013———————————————————————————————
** prefecture level **——————————————————————————————————————————————————————————
use $census\2010_county.dta, clear

tostring code, gen(p)
keep if substr(p,5,6)=="00"

bys code: gen malepref=((male_15_19/5)*3+male_20_24+(male_25_29/5)*3) //17 to 27//

bys code: gen femalepref=((female_15_19/5)*3+female_20_24+(female_25_29/5)*3) 

gen srpref=malepref/femalepref // sex ratio of 20 to 30//

egen poppref=rowtotal(male_0-female_85) //total pop at county level//
order poppref,after(county)
gen pop2030=malepref+femalepref
gen share2030=pop2030/poppref //share of age 20 to 30 group//

gen pop1564_1=((male_10_14+female_10_14)/5)*3+((male_60_64+female_60_64)/5)*2
egen pop1564_2=rowtotal(male_15_19-female_55_59) // 12 to 61 //
gen pop1564=pop1564_1+pop1564_2
gen share1564=pop1564/poppref //share of age 15 to 64 group//

gen prefect=substr(p,1,4), after(code)
destring prefect, force replace

keep code prefect county srpref poppref pop2030 share2030 pop1564 share1564

* merge sex ratio 2013 *

merge 1:m prefect using "$working_data\temp13"
keep if _merge==3
drop _merge

destring pcode, force replace
order county, after(code)

replace prefect=1101 if prefect==1102
replace prefect=5001 if prefect==5002

save "$working_data\temp13", replace // prefecture level, # Obs:349, vars: 26 //

*———————————————————————— merge city info 2013 —————————————————————————————————
use "$citynew\temp13", clear
tostring code, force replace
gen prefect=substr(code,1,4),after (code)
destring code prefect,force replace

replace prefect=1101 if prefect==1100 //Beijing//
replace prefect=5001 if prefect==5000 //Chongqing//
*without yunnan wenshan，dali，dehong*

merge 1:m prefect using "$working_data\temp13"
drop if _merge==1
drop _merge

save "$working_data\temp13", replace // prefecture level, Obs: 349, vars: 161 //

*——————————————————————————append three waves data——————————————————————————————
use "$working_data\temp02", clear

append using "$working_data\temp07"
save "$working_data\temp", replace

append using "$working_data\temp13"
order county, after(code)
save "$working_data\temp", replace

************************         merge complete       **************************
********************************************************************************

 